* Set up log
cd $ohie
cap log close
global sysdate: disp %tdYYNNDD  date("`c(current_date)'", "DMY")
qui log using 	"./logs/global_polynomial_all_specs_$sysdate.log", replace

* Set up timer
timer clear 1
timer on 1 

/*----------------------------------------------------------------------*/
/* PROGRAM: global_polynomial_all_specs.do				*/
/*									*/
/* PURPOSE:								*/
/* [*]	This code outputs the data necessary to create the figures in 	*/
/*	the paper that show the MTE with covariates:			*/
/*		- linmte_ptiles_Y_num_preutilization_line		*/
/* 		- smte_Y_num_binned					*/
/* NOTES:								*/
/* [*]	This code does not calculate treated outcomes, untreated 	*/
/*	outcomes, and treatment effects.				*/
/*									*/
/* OUTPUT:								*/
/* [*]	global_polynomial_all_specs: This output file		*/
/*	contains all data necessary for replicating the above-mentioned	*/
/*	figures in the paper.					 	*/
/*----------------------------------------------------------------------*/

* Set up display options
clear
set type double
set more off, permanently

* Set matrix options
set matsize 5000

*************************************************
* OTHER SETUP			*
*************************************************

* Set up seeds
global Y_num_seed = 	6574358
		
*********************************************
* VARIABLES AND CONTROLS	  	*
*********************************************	

* Input data set	
local indata = "$final/oregonnumhh1.dta"

* Outcome variables
//local Ys "Y_any "
local Ys "Y_num"

* Number of bootstrap replications
* The first replication of the bootstrap process has no re-sampling; in 
* other words the first replication always runs on the full original 
* non-bootstrapped sample.
local reps = 1001
	
* Endogenous variable	
local D		"any_medicaid"

* Instrument
local Z		"Z"

* Define the list of specifications to use
* We also use the "none" specification (i.e., calculate the linear MTE 
* without covariates).
* A side note on covariates: One might not get the same results if they 
* include dummy variables vs. continuous variables in the regressions due to
* presence of missing values for some continuous variables.
local specs "none common pre_all"


* Output Excel file for bootstrapped statistics
local out_file "global_polynomial_all_specs"
	
*********************************************
* SPECIFICATION #1 SETUP	  	*
*********************************************	
* Specification #1: No covariates
	
* Define the functional form of the MTE (1 = linear, 2 = quadratic,
* 3 = cubic, etc.)
* Without covariates, we can only calculate a linear MTE(p), 
* so fcnform_none can only equal 1
local fcnform_none 	"1"

* Define the covariates (Xs) to use for this specification and the 
* interactions of covariates with Z (Zs)
local Xs_none	"w"
local Zs_none	"w"
			
*********************************************
* SPECIFICATION #2 SETUP	  	*
*********************************************	
* Specification #2: Common covariates (including their two-way interactions)
	
* Define the functional form of the MTE (1 = linear, 2 = quadratic, 
* 3 = cubic, etc.)
local fcnform_common 	"1"

* Define the covariates (Xs) to use for this specification and the 
* interactions of covariates with Z (Zs) -- these covariates were defined in the
* ohie_data_setup.do file.
local Xs_common	"_X*"
local Zs_common	"_Z*"

*********************************************
* SPECIFICATION #3 SETUP	  	*
*********************************************	
* Specification #3: 0, 1-3, or 4+ ER-visits
	
* Define the functional form of the MTE (1 = linear, 2 = quadratic, 
* 3 = cubic, etc.)
local fcnform_pre_all 	"1"

* Define the covariates (Xs) to use for this specification and the 
* interactions of covariates with Z (Zs) -- these covariates were defined in the
* ohie_data_setup.do file.
local Xs_pre_all	"Y_num_pre0 Y_num_pre1_3 Y_num_pre4"
local Zs_pre_all	"_pre_Z_Y_num0 _pre_Z_Y_num1_3 _pre_Z_Y_num4"

	
*********************************************
* RUN CODE  	*
*********************************************	

* Begin looping through outcomes
foreach Y of local Ys {
	
	* Initialize the seed
	set seed $`Y'_seed
	
	* Initialize an output matrix for storing the results for each 
	* specification and functional form
	foreach spec of local specs {	
	
		* Store Xs and Zs so that we can replace tremporarily with outcome-specific covariates
		local Xs_`spec'_temp = "`Xs_`spec''"
		local Xs_`spec' = subinstr("`Xs_`spec''","OUTCOME","`Y'",.)
		
		local Zs_`spec'_temp = "`Zs_`spec''"
		local Zs_`spec' = subinstr("`Zs_`spec''","OUTCOME","`Y'",.)
		
		* Matrix
		foreach f of local fcnform_`spec' {		
			matrix `spec'_`Y'_FN`f' = J(`reps', 500, .)
			
			* Initialize the list for renaming variables later on
			local renamevars_`spec'_`f' ""
		}
	}
	
	* Begin bootstrapping loop
	forval rep = 1/`reps' {
			
	*********************************************
	* SETUP	  	*
	*********************************************	
	
		* Set up
		use "`indata'", clear
			
		* Drop observations where the outcome is missing before
		* re-sampling for the bootstrapping
		* The reason to drop observations with a missing outcome 
		* before re-sampling is twofold:
		* (a) By dropping missing outcomes, we cannot oversample
		*  individuals with missing values when bootstrappin
		* (b) By dropping missing outcomes, we ensure that every 
		* bootstrap sample is of equal size
		keep if `Y' != . 

		* Re-sample for bootstrapping
		* Because we are comparing the MTEs across different
		* specifications, we do not drop 
		* individuals with g covariates before bootstrapping 
		* so that we keep the bootstrap sample consistent across 
		* specifications. However, we still drop individuals with 
		* missing values for the covariates after resampling
		* for the bootstrapping
		quietly if `rep' > 1 {
			bsample
		}
		
		* Save the data set
		save "$final/_temp_`Y'.dta", replace
		
		sleep 3000 //Pause stata to let it save the file properly before using it.
		* Loop through all specifications
				
		* Begin looping through specifications
		* We resample for bootstrapping first and then loop through 
		* the specifications.
		* This ensures that we estimate all specifications on the same 
		* bootstrapped sample before moving on to re-sample again.
		foreach spec of local specs {
			
			if `rep'==1 count
						
			* Begin looping through functional forms (the paper only
			* uses linear. However, the code can be modified to 
			* accommodate higher-order polynomials.				
			foreach f of local fcnform_`spec' {
						
				* Set up iterator for outputing to the matrix
				local matrixiter = 1
			
				* Output to log  -for tracking purposes
				di "Estimating MTE for outcome `Y', specification `spec', functional form `f', bootstrap replication `rep'"
						
			*********************************************
			* PROPENSITY SCORE ESTIMATION  	*
			*********************************************
			
				use "$final/_temp_`Y'.dta", clear
						
				* Estimate propensity score using Z, Xs, and the interactions between Z and the Xs as the 
				* covariates and D as the dependent variable. We use a linear regression to predict the 
				* propensity score.  Please note that we do not censor the propensity scores here. We keep the 
				* uncensored propensity scores for the MTE estimation, however, we do censor them for 
				* estimating any treatment effects from an MTE with covariates. Propensity scores are not 
				* censored when estimating the linear MTE without covariates.					
				quietly reg `D' `Z' `Xs_`spec'' `Zs_`spec''
				predict p_Z
				
				* Diagnostics
				di "Number of individuals with propensity scores >1"
				count if p_Z>1 & p_Z !=.
				
				di "Number of individuals with propensity scores <0"
				count if p_Z<0 & p_Z !=.
				
				di "Summary statistics for the estimated propensity score"
				su p_Z
					
				* Drop the propensity score if it's equal to missing. This implies that we are dropping  
				* individuals who have missing covariates from the MTE estimation.
				drop if p_Z==.

				* Save the data set										
				compress						
				save "$final/_glpl_`Y'_`spec'", replace			
				
			*********************************************
			* ESTIMATE ATO(p), AUO(p), beta_T AND beta_U *				
			*********************************************
			* This estimation algorithm is detailed in Appendix D of the paper
				
				* Use the separate estimation approach to calculate ATO(p) and AUO(p). The separate 
				* estimation approach means that we estimate the ATO(p) and beta_T among the treated 
				* (individuals with D=1), and the AUO(p) among the untreated (individuals with D=0) by
				* running separate regressions.
				forval d = 0/1 {
			
					use "$final/_glpl_`Y'_`spec'", clear
				
					* Restrict sample (e.g., conditional on D=1, conditional on D=0)
					keep if `D' == `d'
					
					* Based on the pre-specified functional form of g(p), generate the variables 
					* for the polynomial and store them in macros g(p) is interpreted as the polynomial 
					* components of p and form the basis of what will become ATO(p) and AUO(p).							
					local p_poly ""
					forvalues n=1/`f' {
						gen p_Z_`n' = p_Z^(`n')
						local p_poly "`p_poly' p_Z_`n'"
					}				
					
					* Estimate beta_T and ATO(p) if running the regression on the treated, 
					* otherwise we are estimating beta_U and AUO(p)
				
					* Regress Y on Xs and specified functional form of p, what we call g(p). In this 
					* regression, we do not include the Zs (the interactions between Z and the 
					* covariates). We use a linear regression for estimation. Brinch et al (2015) use 
					* double residual regressions and a local linear regression for estimation. When 
					* estimating a global polynomial, a double residual regression and a regular
					* regression should yield the same results.
					quietly reg `Y' `Xs_`spec'' `p_poly' 
					
					
					if _rc == 0 {
					quietly {
				
					* Save the betas for the covariates in macro variables and in the matrix
					* The coefficients on the covariates from the above regression reflect the 
					* beta_T or beta_U, depending on whether we are running the regression
					* on the treated or the untreated. We save the betas in the matrix 
					* and in the final output data set so that we can use them later on
					* for extrapolation.
					foreach var of varlist `Xs_`spec'' {
			
						* Save as scalar
						scalar define _b_`d'_`var' = _b[`var']
						
						* Save in the output matrix
						matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = _b[`var']
						local ++matrixiter
						if `rep'==1 {
							local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' _b_`d'_`var'"
						}
					}
		
					* Save the gammas for g(p) in a separate matrix
					* The coefficients on the g(p) components ,which we call gammas, 
					* from the above regression reflect the components of 
					* ATO(p) or AUO(p), depending on whether we are running
					* the regression on the treated or the untreated.
			
					* Create a matrix that contains the estimates for g(p)
					matrix K_`d' = J(1, `f'+1, .)
			
					local j = 2
					foreach var of varlist `p_poly' {

						matrix K_`d'[1,`j'] = _b[`var']
						local ++j
					}
		
					* Initialize the constant for g(p) to the beta for the omitted category
					* The coefficient on the constant from the regression above is 
					* interpreted as the intercept of ATO(p) or AUO(p), depending on 
					* whether we are running a regression on the treated or the untreated.
					* This implies that both beta_T and beta_U for the omitted category 
					* in the above regression are equal to 0.
					matrix K_`d'[1,1] = _b[_cons]
					}	
				
					}
					
				} // end loop for separate estimation
							
				* Define ATO(p) and AUO(p)
					
				* ATO(p) is defined using the coefficients (gammas) on the g(p) 
				* polynomial components on the regression above that has been run 
				* on the treated individuals only
						
				local temp = `f'+1
				
				matrix ATO_matrix = J(1,`temp',.)
				
				forval m = 1/`temp' {
					matrix ATO_matrix[1,`m'] = K_1[1,`m']
				}
										
				* AUO(p) is defined using the coefficients (gammas) on the g(p) 
				* polynomial components on the regression above that
				* has been run on the untreated individuals only
				
				matrix AUO_matrix = J(1,`temp',.)
				
				forval m = 1/`temp' {
					matrix AUO_matrix[1,`m'] = K_0[1,`m']
				}
											
				* Compute mto(p) and muo(p) from ATO(p) and AUO(p).
				* The derivatives obtained in this section are the 
				* lowercase mto(p) and muo(p). To obtain the MTO(p) and the MUO(p),
				* we need to calculate MTO(p) = beta_T*X + mto(p) and 
				* MUO(p) = beta_U*Z + muo(p). These are not calculated 
				* in this code but the setup is already in place 
				* and they can be easily computed and graphed.
				der_MTO ATO_matrix
				der_MUO AUO_matrix
										
				* Compute the mte(p) as the difference between mto(p) and muo(p)
				* Again, note that the difference between mto(p) and muo(p) is the lowercase mte(p).
				local temp = `f'+1
				matrix mte_matrix = J(1, `temp', .)
					
				forval m=1/`temp' {		
					matrix mte_matrix[1,`m'] = MTO_matrix[1,`m'] - MUO_matrix[1,`m']				
				}	
				
				matrix list mte_matrix	
						
				* Save the mto(p), muo(p), and mte(p) in the 
				* output matrix for the common and firstday and no covariates specifications
				* This will be used in the extrapolation section of the paper.				
				forval m=1/`temp' {
				
					* mto(p)
					matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = MTO_matrix[1,`m']
					local ++matrixiter
					if `rep' ==1 {
						local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' MTO_term_`m'"
					}
					di MTO_matrix[1,`m']
					
					* muo(p)
					matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = MUO_matrix[1,`m']
					local ++matrixiter
					if `rep' ==1 {
						local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' MUO_term_`m'"
					}
					
					* mte(p)
					matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = mte_matrix[1,`m']
					local ++matrixiter
					if `rep' ==1 {
						local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' mte_term_`m'"
					}						
				}
				
				
				* For the specification including controls for 
				* rre-period ER visit, save the intercepts at each bin for 
				* plotting the "linmte_ptiles_Y_num_pre_utilization_line" 
				
				if "`spec'"=="pre_all"{
					foreach var of varlist `Xs_`spec'' {
						matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = (_b_1_`var' - _b_0_`var') + mte_matrix[1,1]
						local ++matrixiter
						if `rep'==1 {
							local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' _intercept_`var'"
						}
					}
				}
				
															
			*********************************************
			* ESTIMATE SMTE(p), minMTE(x,p), and maxMTE(x,p) *				
			*********************************************				
						
				use "$final/_glpl_`Y'_`spec'", clear
										
				* Calculate mu_0 = beta_U*X, mu_1 = beta_T*X,
				* and mu= (beta_T - beta_U)'*X for each individual
				* in the sample
				quietly gen double mu_U = 0
				quietly gen double mu_T = 0
				quietly gen double mu = 0
				
				quietly foreach x of varlist `Xs_`spec'' {
					replace mu_U = mu_U + _b_0_`x'*`x'
					replace mu_T = mu_T + _b_1_`x'*`x'
					replace mu   = mu + (_b_1_`x'-_b_0_`x')*`x'
				}
				

				* Save a data set with the computed mu so that we can check who has 
				* the minimum and maximum MTE(x,p) manually.
				
				if "`spec'"=="none" & `f'==1 {
					save "$final/_`Y'_minmax_MTE_none.dta", replace
				}
				
				if "`spec'"=="common" & `f'==1 {
					save "$final/_`Y'_minmax_MTE_common.dta", replace
				}
				
				if "`spec'"=="pre_all" & `f'==1 {
					save "$final/_`Y'_minmax_MTE_pre_all.dta", replace
				}
				
				

				* Calculate the average mu, mu_T and, mu_U across all individuals in the sample
				* If we are estimating the MTE(x,p) for the specification with pre-period ER visits,
				* then also calculate the minimum of the mus.
				
				quietly su mu
				local avgmu = `r(mean)'
				if "`spec'"=="pre_all" local minmu = `r(min)'
				if "`spec'"=="pre_all" local maxmu = `r(max)'
				
				quietly su mu_U
				local avgmu_U = `r(mean)'
				if "`spec'"=="pre_all" local minmu_U = `r(min)'
				if "`spec'"=="pre_all" local maxmu_U = `r(max)'
				
				quietly su mu_T
				local avgmu_T = `r(mean)'
				if "`spec'"=="pre_all" local minmu_T = `r(min)'
				if "`spec'"=="pre_all" local maxmu_T = `r(max)'
				
				* Calculate the SMTE(p); if the specification is with pre-period ER visits, 
				* then also calculate the minMTE(x,p) and maxMTE(x,p)
				
				* Copy the contents of the mte(p) in three separate 
				* matrices for SMTE(p), minMTE(p), and maxMTE(p)
				
				matrix SMTE_matrix = J(1, `temp', .)
				matrix SMUO_matrix = J(1, `temp', .)
				matrix SMTO_matrix = J(1, `temp', .)
				
				if "`spec'"=="pre_all" {
					matrix minMTE_matrix = J(1, `temp', .)
					matrix maxMTE_matrix = J(1, `temp', .)
					
					matrix minMUO_matrix = J(1, `temp', .)
					matrix maxMUO_matrix = J(1, `temp', .)
					
					matrix minMTO_matrix = J(1, `temp', .)
					matrix maxMTO_matrix = J(1, `temp', .)
				}				
			
				forval m=1/`temp' {
				
					matrix SMTE_matrix[1,`m'] = mte_matrix[1,`m']
					matrix SMUO_matrix[1,`m'] = MUO_matrix[1,`m']
					matrix SMTO_matrix[1,`m'] = MTO_matrix[1,`m']
					
					if "`spec'"=="pre_all" {
						matrix minMTE_matrix[1,`m'] = mte_matrix[1,`m']
						matrix maxMTE_matrix[1,`m'] = mte_matrix[1,`m']
						
						matrix minMUO_matrix[1,`m'] = MUO_matrix[1,`m']
						matrix maxMUO_matrix[1,`m'] = MUO_matrix[1,`m']
						
						matrix minMTO_matrix[1,`m'] = MTO_matrix[1,`m']
						matrix maxMTO_matrix[1,`m'] = MTO_matrix[1,`m']
					}							
				}
						
				* Add the avg mu, min mu, and max mu to the appropriate mte(p) matrices
					
				matrix SMTE_matrix[1,1] = SMTE_matrix[1,1] + `avgmu' 	// this gives the intercept of the E[MTE(p,X)] line in Figure "smte_Y_num_binned" of the paper.
				matrix SMUO_matrix[1,1] = SMUO_matrix[1,1] + `avgmu_U'
				matrix SMTO_matrix[1,1] = SMTO_matrix[1,1] + `avgmu_T'

				if "`spec'"=="pre_all" {							
					matrix minMTE_matrix[1,1] = minMTE_matrix[1,1] + `minmu'
					matrix maxMTE_matrix[1,1] = maxMTE_matrix[1,1] + `maxmu'
						
					matrix minMUO_matrix[1,1] = minMUO_matrix[1,1] + `minmu_U'
					matrix maxMUO_matrix[1,1] = maxMUO_matrix[1,1] + `maxmu_U'
					
					matrix minMTO_matrix[1,1] = minMTO_matrix[1,1] + `minmu_T'
					matrix maxMTO_matrix[1,1] = maxMTO_matrix[1,1] + `maxmu_T'	
				}
					
				* Output the SMTE(p), the minMTE(x,p) (if applicable) 
				* and the maxMTE(x,p) (if applicable) to the output matrix
				* The output will contain each component of the SMTE(p) 
				* (or minMTE(x,p) or maxMTE(x,p)) separately, with the suffix 
				* "_term_X", where X=1 for the intercept, X=2 for the 
				* linear component of the polynomial.
					
				forval m=1/`temp' {
				
				* Output SMTE(p), SMUO(p), SMTO(p)
				foreach func in MTE MUO MTO {
				
					matrix list S`func'_matrix
					matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = S`func'_matrix[1,`m']
					local ++matrixiter
					if `rep' ==1 {
						local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' S`func'_term_`m'"
					}
					
					* min(x,p)
					if "`spec'"=="pre_all" {
						matrix list min`func'_matrix
						matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = min`func'_matrix[1,`m']
						local ++matrixiter
						if `rep' ==1 {
							local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' min`func'_term_`m'"
						}
					}
				
					* max(x,p)
					if "`spec'"=="pre_all" {
						matrix list max`func'_matrix
						matrix `spec'_`Y'_FN`f'[`rep', `matrixiter'] = max`func'_matrix[1,`m']
						local ++matrixiter
						if `rep' ==1 {
							local renamevars_`spec'_`f' "`renamevars_`spec'_`f'' max`func'_term_`m'"
							
						}
					}
					
				} // close function loop
				} // close term loop
					
				
			} // end functional form loop
			
		} // end specification loop
		
	} // end bootstrapping loop
	set trace on
	
	* Loop through all of the matrices generated for each specification and 
	* functional form combination and save them as data sets
	foreach spec of local specs {
	
		foreach f of local fcnform_`spec' {
	
			* Save matrix
			svmat double `spec'_`Y'_FN`f'
					
			* Delete extra variables and observations
			keep `spec'_`Y'_FN`f'*
			
			gen obs_num = _n
			drop if obs_num>`reps'
			drop obs_num
						
			* Rename variables
			local i=1		
			foreach var of local renamevars_`spec'_`f' {			
				rename `spec'_`Y'_FN`f'`i' `var'
				local ++i
			}
			
			drop  `spec'_`Y'_FN`f'*
			
			* Save as data set
			save "$final/mtedata_`spec'_`Y'_FN`f'.dta", replace
		}
	}
	
	foreach spec of local specs {
	
		foreach f of local fcnform_`spec' {		
	
			* Load matrix
			use "$final/mtedata_`spec'_`Y'_FN`f'.dta", clear
			
			* Delete the betas, we do not need to output bootstrap 
			* statistics for these . We do need them for extrapolation
			* though.
			drop _b*
			
			* Output the bootstrap statistics
			bootstrap_statistics $output `out_file' `Y'_FN`f'_`spec'
		}
	}
		
		
	* Replace covariates in Xs and Zs with OUTCOME placeholder
	foreach spec of local specs {
		local Xs_`spec' = "`Xs_`spec'_temp'"
		local Zs_`spec' = "`Zs_`spec'_temp'"
		di "`Xs_`spec''"
	}
		set trace off						
} // end outcomes loop

timer off 1
timer list 1
local hours = `r(t1)'/3600
di "Computing time is `hours' hours"
		
qui log close


